knitr::opts_chunk$set(echo = FALSE, message = FALSE)
library(Seurat)
library(ggplot2)
library(data.table)
library(MAST)
library(SingleR)
library(dplyr)
library(tidyr)
library(limma)
## R version 4.0.2 (2020-06-22)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS Catalina 10.15.4
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRblas.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRlapack.dylib
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
## [1] parallel  stats4    stats     graphics  grDevices utils     datasets 
## [8] methods   base     
## 
## other attached packages:
##  [1] limma_3.44.3                tidyr_1.1.1                
##  [3] dplyr_1.0.2                 SingleR_1.2.4              
##  [5] MAST_1.14.0                 SingleCellExperiment_1.10.1
##  [7] SummarizedExperiment_1.18.2 DelayedArray_0.14.1        
##  [9] matrixStats_0.56.0          Biobase_2.48.0             
## [11] GenomicRanges_1.40.0        GenomeInfoDb_1.24.2        
## [13] IRanges_2.22.2              S4Vectors_0.26.1           
## [15] BiocGenerics_0.34.0         data.table_1.13.0          
## [17] ggplot2_3.3.2               Seurat_3.2.0               
## 
## loaded via a namespace (and not attached):
##   [1] AnnotationHub_2.20.1          BiocFileCache_1.12.1         
##   [3] plyr_1.8.6                    igraph_1.2.5                 
##   [5] lazyeval_0.2.2                splines_4.0.2                
##   [7] BiocParallel_1.22.0           listenv_0.8.0                
##   [9] digest_0.6.25                 htmltools_0.5.0              
##  [11] magrittr_1.5                  memoise_1.1.0                
##  [13] tensor_1.5                    cluster_2.1.0                
##  [15] ROCR_1.0-11                   globals_0.12.5               
##  [17] colorspace_1.4-1              blob_1.2.1                   
##  [19] rappdirs_0.3.1                ggrepel_0.8.2                
##  [21] xfun_0.16                     crayon_1.3.4                 
##  [23] RCurl_1.98-1.2                jsonlite_1.7.0               
##  [25] spatstat_1.64-1               spatstat.data_1.4-3          
##  [27] survival_3.2-3                zoo_1.8-8                    
##  [29] ape_5.4-1                     glue_1.4.1                   
##  [31] polyclip_1.10-0               gtable_0.3.0                 
##  [33] zlibbioc_1.34.0               XVector_0.28.0               
##  [35] leiden_0.3.3                  BiocSingular_1.4.0           
##  [37] future.apply_1.6.0            abind_1.4-5                  
##  [39] scales_1.1.1                  DBI_1.1.0                    
##  [41] miniUI_0.1.1.1                Rcpp_1.0.5                   
##  [43] viridisLite_0.3.0             xtable_1.8-4                 
##  [45] reticulate_1.16               bit_4.0.4                    
##  [47] rsvd_1.0.3                    htmlwidgets_1.5.1            
##  [49] httr_1.4.2                    RColorBrewer_1.1-2           
##  [51] ellipsis_0.3.1                ica_1.0-2                    
##  [53] pkgconfig_2.0.3               uwot_0.1.8                   
##  [55] dbplyr_1.4.4                  deldir_0.1-28                
##  [57] tidyselect_1.1.0              rlang_0.4.7                  
##  [59] reshape2_1.4.4                later_1.1.0.1                
##  [61] AnnotationDbi_1.50.3          munsell_0.5.0                
##  [63] BiocVersion_3.11.1            tools_4.0.2                  
##  [65] generics_0.0.2                RSQLite_2.2.0                
##  [67] ExperimentHub_1.14.1          ggridges_0.5.2               
##  [69] evaluate_0.14                 stringr_1.4.0                
##  [71] fastmap_1.0.1                 yaml_2.2.1                   
##  [73] goftest_1.2-2                 knitr_1.29                   
##  [75] bit64_4.0.2                   fitdistrplus_1.1-1           
##  [77] purrr_0.3.4                   RANN_2.6.1                   
##  [79] pbapply_1.4-3                 future_1.18.0                
##  [81] nlme_3.1-148                  mime_0.9                     
##  [83] compiler_4.0.2                plotly_4.9.2.1               
##  [85] curl_4.3                      png_0.1-7                    
##  [87] interactiveDisplayBase_1.26.3 spatstat.utils_1.17-0        
##  [89] tibble_3.0.3                  stringi_1.4.6                
##  [91] lattice_0.20-41               Matrix_1.2-18                
##  [93] vctrs_0.3.2                   pillar_1.4.6                 
##  [95] lifecycle_0.2.0               BiocManager_1.30.10          
##  [97] lmtest_0.9-37                 RcppAnnoy_0.0.16             
##  [99] BiocNeighbors_1.6.0           cowplot_1.0.0                
## [101] bitops_1.0-6                  irlba_2.3.3                  
## [103] httpuv_1.5.4                  patchwork_1.0.1              
## [105] R6_2.4.1                      promises_1.1.1               
## [107] KernSmooth_2.23-17            gridExtra_2.3                
## [109] codetools_0.2-16              MASS_7.3-52                  
## [111] assertthat_0.2.1              withr_2.2.0                  
## [113] sctransform_0.2.1             GenomeInfoDbData_1.2.3       
## [115] mgcv_1.8-31                   grid_4.0.2                   
## [117] rpart_4.1-15                  rmarkdown_2.3                
## [119] DelayedMatrixStats_1.10.1     Rtsne_0.15                   
## [121] shiny_1.5.0

Introduction

In v2 of the analysis we decided to include the control mice from the Nbeal experiment with the Migr1 and Mpl mice. The thought is that it may be good to have another control, since the Migr1 control has irradiated and had a bone marrow transplantation. I’m going to split the Rmarkdown files into separate part, to better organize my analysis.

This File

Here the control from the Nbeal and the data from the Migr1/Mpl experiment are going to be integrated together and we are going to do all the normalization, scaling, and data reduction. I’m following the integration guide from the Satija Lab and some of my previous files (integrating_with_hwbm_to…)

Loading the data and getting HTO labels

Loading the raw matrices supplied by 10X for the two experiment and find the cells that have an HTO

Migr1/Mpl Experiment

We started out with 11,278 cells in the experiment. After a basic filtering when creating the Seurat object (minimum of 200 features, also filtered for genes that were expressed in a minimum of 3 cells) we had 10,157 cells. There were only 7,542 cells that had an HTO label, of which 7,228 passed the basic Seurat filtering.

Dashed lines indicate cutoffs I used for subsetting the Seurat dataset. We end up with 6,121 cells for the Migr1/Mpl experiment. Venn diagram is just for our use, and can be made nicer if we wish to publish something like that.

Nbeal Controls

We started 6,376 that passed basic Seurat filtering, and **1,991* were labeled with either the HTO3 or HTO4 tag, which indicates they came from the control mouse.

## 
## enrNbeal_cntrl    Nbeal_cntrl 
##           1069            922
## 
## TRUE 
## 1991
## 
## FALSE  TRUE 
##    43  1948
## 
## enrNbeal_cntrl    Nbeal_cntrl 
##           1037            911

We end up with 1,754 control cells from the Nbeal experiment (normal and enriched).

Integrating the datasets

## Warning: Adding a command log without an assay associated with it

## [1] "Going with the first 20 PCs"
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 7875
## Number of edges: 329769
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 1.0000
## Number of communities: 2
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 7875
## Number of edges: 329769
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9826
## Number of communities: 8
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 7875
## Number of edges: 329769
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9729
## Number of communities: 10
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 7875
## Number of edges: 329769
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9643
## Number of communities: 11
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 7875
## Number of edges: 329769
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9574
## Number of communities: 12
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 7875
## Number of edges: 329769
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9513
## Number of communities: 15
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 7875
## Number of edges: 329769
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9453
## Number of communities: 15
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 7875
## Number of edges: 329769
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9394
## Number of communities: 15
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 7875
## Number of edges: 329769
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9347
## Number of communities: 19
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 7875
## Number of edges: 329769
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9303
## Number of communities: 19
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 7875
## Number of edges: 329769
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9260
## Number of communities: 19
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 7875
## Number of edges: 329769
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9216
## Number of communities: 21
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 7875
## Number of edges: 329769
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9175
## Number of communities: 21
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 7875
## Number of edges: 329769
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9136
## Number of communities: 22
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 7875
## Number of edges: 329769
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9100
## Number of communities: 22
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 7875
## Number of edges: 329769
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9067
## Number of communities: 22
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 7875
## Number of edges: 329769
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9031
## Number of communities: 23
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 7875
## Number of edges: 329769
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8999
## Number of communities: 23
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 7875
## Number of edges: 329769
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8965
## Number of communities: 23
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 7875
## Number of edges: 329769
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8934
## Number of communities: 24
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 7875
## Number of edges: 329769
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8899
## Number of communities: 24
## Elapsed time: 0 seconds

## [1] "Going with index 7 = 0.3"
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 7875
## Number of edges: 329769
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9453
## Number of communities: 15
## Elapsed time: 0 seconds
## Warning: The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
## To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
## This message will be shown once per session

## Warning: Adding a command log without an assay associated with it

## [1] "Going with the first 20 PCs"
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 8668
## Number of edges: 373759
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 1.0000
## Number of communities: 2
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 8668
## Number of edges: 373759
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9822
## Number of communities: 8
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 8668
## Number of edges: 373759
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9724
## Number of communities: 10
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 8668
## Number of edges: 373759
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9647
## Number of communities: 12
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 8668
## Number of edges: 373759
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9572
## Number of communities: 13
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 8668
## Number of edges: 373759
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9498
## Number of communities: 13
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 8668
## Number of edges: 373759
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9429
## Number of communities: 15
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 8668
## Number of edges: 373759
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9372
## Number of communities: 16
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 8668
## Number of edges: 373759
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9325
## Number of communities: 17
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 8668
## Number of edges: 373759
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9283
## Number of communities: 18
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 8668
## Number of edges: 373759
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9241
## Number of communities: 21
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 8668
## Number of edges: 373759
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9199
## Number of communities: 21
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 8668
## Number of edges: 373759
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9161
## Number of communities: 21
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 8668
## Number of edges: 373759
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9123
## Number of communities: 22
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 8668
## Number of edges: 373759
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9085
## Number of communities: 23
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 8668
## Number of edges: 373759
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9049
## Number of communities: 23
## Elapsed time: 1 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 8668
## Number of edges: 373759
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9012
## Number of communities: 23
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 8668
## Number of edges: 373759
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8976
## Number of communities: 24
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 8668
## Number of edges: 373759
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8941
## Number of communities: 25
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 8668
## Number of edges: 373759
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8911
## Number of communities: 25
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 8668
## Number of edges: 373759
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8885
## Number of communities: 25
## Elapsed time: 0 seconds

## [1] "Going with index 7 = 0.3"
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 8668
## Number of edges: 373759
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9429
## Number of communities: 15
## Elapsed time: 0 seconds

## 
##       enrMigr1         enrMpl enrNbeal_cntrl          Migr1            Mpl 
##            653           1315           1037           2144           2608 
##    Nbeal_cntrl 
##            911
## 
##   Mpl Nbeal 
##  6720  1948